import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import folium
from folium import plugins
from folium.plugins import HeatMap
import datetime
Data extracted from the source is suitable for analysis (no missing values or errors)
vancouver_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Vancouver.csv')
burnaby_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Burnaby.csv')
delta_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Delta.csv')
new_west_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_New_west.csv')
surrey_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Surrey.csv')
coquitlam_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Coquitlam.csv')
richmond_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_Richmond.csv')
north_vancouver_crashes = pd.read_csv(r'\Users\Fernando Ramos\Desktop\Python Projects\Car crashes\Location_Full_Data_data_North_vancouver.csv')
#Combining all files into one for analysis
mainland_crashes = pd.concat([vancouver_crashes,burnaby_crashes,delta_crashes,new_west_crashes,
surrey_crashes,coquitlam_crashes,richmond_crashes,north_vancouver_crashes])
mainland_crashes.head()
| Location | Crash Type | Municipality | Year | Crash Count | Latitude | Longitude | |
|---|---|---|---|---|---|---|---|
| 0 | GRANDVIEW HWY S & TEMPLETON DR | Property damage only | VANCOUVER | 2017 | 1 | 49.259090 | -123.060191 |
| 1 | BOUNDARY RD & REGENT ST | Property damage only | VANCOUVER | 2017 | 1 | 49.259109 | -123.023633 |
| 2 | VINE ST & W 14TH AVE | Property damage only | VANCOUVER | 2017 | 1 | 49.259265 | -123.157816 |
| 3 | E 13TH AVE & WATSON ST | Property damage only | VANCOUVER | 2017 | 1 | 49.259302 | -123.100497 |
| 4 | ASH ST & W 13TH AVE | Property damage only | VANCOUVER | 2017 | 3 | 49.259493 | -123.117658 |
year_high_crash = mainland_crashes.groupby('Year')["Crash Count"].sum().reset_index().sort_values(by="Crash Count",ascending=False)
year_high_crash['Year']=year_high_crash['Year'].astype(str)
fig = px.line(year_high_crash,x="Year",y='Crash Count',text='Crash Count',title="Total Crash Count per Year (2016 - 2020)")
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False,plot_bgcolor='rgba(0,0,0,0)')
fig.update_yaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()
reduction = round((year_high_crash['Crash Count'].iloc[-2]-year_high_crash['Crash Count'].iloc[-1])*100/year_high_crash['Crash Count'].iloc[-2],1)
print('The year with the largest amount of crashes is {}, with total count at {}\n'
.format(year_high_crash['Year'].tolist()[0],year_high_crash["Crash Count"].tolist()[0]))
print('Number of crashes reduced from {} in {} to {} in {}, a reduction of {}%'
.format(year_high_crash['Crash Count'].iloc[-2],year_high_crash['Year'].iloc[-2],
year_high_crash['Crash Count'].iloc[-1], year_high_crash['Year'].iloc[-1],reduction))
The year with the largest amount of crashes is 2016, with total count at 82909 Number of crashes reduced from 71950 in 2019 to 46592 in 2020, a reduction of 35.2%
city_high_crash = mainland_crashes.groupby('Municipality')["Crash Count"].sum().reset_index().sort_values(by="Crash Count",ascending=False)
fig = px.bar(city_high_crash,x="Crash Count",y='Municipality',color='Municipality',text='Crash Count',title="Total Crash Count per City (2016 - 2020)")
fig.update_traces(textposition='outside')
fig.update_layout(showlegend=False,plot_bgcolor='rgba(0,0,0,0)')
fig.update_yaxes(title='',visible=True,showticklabels=True,ticksuffix = " ")
fig.update_xaxes(visible=False,range=[0, 150000],showticklabels=True)
fig.show()
print('The city with the largest amount of crashes is {}, with total count at {}'
.format(city_high_crash['Municipality'].tolist()[0],city_high_crash["Crash Count"].tolist()[0]))
The city with the largest amount of crashes is VANCOUVER, with total count at 132614
location = mainland_crashes.sort_values(by='Crash Count', ascending=False)
print(location[['Location','Municipality','Year','Crash Count']].iloc[0])
print('\nThe location with highest occurrence of accidents was {} in {} in {}, with total crash count of {}'.
format(location['Location'].iloc[0],location['Year'].iloc[0],location['Municipality'].iloc[0],
location['Crash Count'].iloc[0]))
Location KNIGHT ST & KNIGHT ST BRIDGE & SE MARINE DR & ... Municipality VANCOUVER Year 2016 Crash Count 199 Name: 13101, dtype: object The location with highest occurrence of accidents was KNIGHT ST & KNIGHT ST BRIDGE & SE MARINE DR & SE MARINE DR OFFRAMP & SE MARINE DR ONRAMP & BUS RAMP in 2016 in VANCOUVER, with total crash count of 199
location_sum=mainland_crashes.groupby('Location')["Crash Count"].sum().reset_index().sort_values(by="Crash Count",ascending=False)
location_sum= location_sum.reset_index(drop=True)
location_merge = mainland_crashes.drop_duplicates(subset='Location')
location_sum = pd.merge(location_sum,location_merge[['Location','Municipality','Latitude','Longitude']],on='Location',how='inner')
print(location_sum.iloc[0])
print('\nThe location with highest occurrence of accidents was {} in {}, with total crash count of {}'.
format(location_sum['Location'].iloc[0], location_sum['Municipality'].iloc[0],
location_sum['Crash Count'].iloc[0]))
Location KNIGHT ST BRIDGE Crash Count 2362 Municipality VANCOUVER Latitude 49.198601 Longitude -123.077555 Name: 0, dtype: object The location with highest occurrence of accidents was KNIGHT ST BRIDGE in VANCOUVER, with total crash count of 2362
casualty = mainland_crashes.loc[mainland_crashes['Crash Type']=='Casualty']
casualty = casualty.groupby('Year')["Crash Type"].count().reset_index()
casualty
#.groupby('Year')["Crash Type"].count().reset_index()
#mainland_crashes
fig = px.line(casualty,x="Year",y='Crash Type',text='Crash Type',title="Total Casualties Count per Year (2016 - 2020)")
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False,plot_bgcolor='rgba(0,0,0,0)')
fig.update_yaxes(title='Casulaty Count',showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()
casualty_reduction = round((casualty['Crash Type'].iloc[-2]-casualty['Crash Type'].iloc[-1])*100/casualty['Crash Type'].iloc[-2],1)
print('The year with the largest number of crashes is {}, with total count at {}\n'
.format(casualty['Year'].tolist()[0],casualty["Crash Type"].tolist()[0]))
print('Number of casualties reduced from {} in {} to {} in {}, a reduction of {}%'
.format(casualty['Crash Type'].iloc[-2],casualty['Year'].iloc[-2],
casualty['Crash Type'].iloc[-1], casualty['Year'].iloc[-1],casualty_reduction))
The year with the largest number of crashes is 2016, with total count at 5869 Number of casualties reduced from 5739 in 2019 to 4708 in 2020, a reduction of 18.0%
casualty2 = mainland_crashes.loc[mainland_crashes['Crash Type']=='Casualty']
casualty2 = casualty2.groupby(['Year','Municipality'])["Crash Type"].count().reset_index()
#casualty2
fig = px.line(casualty2,x="Year",y='Crash Type',text='Crash Type',facet_row = 'Municipality',height=2800, width=750,title="Total Casualties Count per Year (2016 - 2020)")
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False,plot_bgcolor='rgba(0,0,0,0)')
fig.update_yaxes(title='Casulaty Count',showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()
name = mainland_crashes['Municipality'].unique().tolist()
casualty_reduction2=[]
for i in name:
reduction_casualty2 = casualty2.loc[casualty2['Municipality']==i]
reduction2 = round((reduction_casualty2['Crash Type'].iloc[-2]-reduction_casualty2['Crash Type'].iloc[-1])*100/reduction_casualty2['Crash Type'].iloc[-2],1)
casualty_reduction2.append(reduction2)
reduction_dict={'City':name,'Casualty Count':casualty_reduction2}
reduction_df = pd.DataFrame(reduction_dict)
reduction_city_casualty = reduction_df['Casualty Count'].max()
max_reduction = reduction_df.loc[reduction_df['Casualty Count']==reduction_city_casualty]
year_prev = casualty2.loc[casualty2['Municipality']==max_reduction['City'].tolist()[0]]['Year'].tolist()[-2]
print('The city with highest casualty reduction rate is {} with a reduction of {}% in \ncomparison to previous year {}'
.format(max_reduction['City'].tolist()[0],max_reduction['Casualty Count'].tolist()[0],year_prev))
The city with highest casualty reduction rate is NEW WESTMINSTER with a reduction of 23.3% in comparison to previous year 2019
data = location_sum.loc[location_sum["Crash Count"]>500]
m = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(data['Latitude'],data['Longitude'],data['Location'],data["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m)
m
# funtion to filter data based on year and crash count
def year_crash(year):
crash_year = mainland_crashes.groupby(['Year','Location'])['Crash Count'].sum().reset_index().sort_values(by="Crash Count",ascending=False)
crash_num_year = crash_year.loc[crash_year['Year']==year]
map_data_year = crash_num_year.loc[crash_num_year['Crash Count']>100]
map_data_year = pd.merge(map_data_year,location_merge[['Location','Municipality','Latitude','Longitude']],on='Location',how='inner')
return map_data_year
map_data_2016 = year_crash(2016)
map_data_2017 = year_crash(2017)
map_data_2018 = year_crash(2018)
map_data_2019 = year_crash(2019)
map_data_2020 = year_crash(2020)
m_2016 = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(map_data_2016['Latitude'],map_data_2016['Longitude'],map_data_2016['Location'],map_data_2016["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m_2016)
m_2016
m_2017 = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(map_data_2017['Latitude'],map_data_2017['Longitude'],map_data_2017['Location'],map_data_2017["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m_2017)
m_2017
m_2018 = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(map_data_2018['Latitude'],map_data_2018['Longitude'],map_data_2018['Location'],map_data_2018["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m_2018)
m_2018
m_2019 = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(map_data_2019['Latitude'],map_data_2019['Longitude'],map_data_2019['Location'],map_data_2019["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m_2019)
m_2019
m_2020 = folium.Map(location=[49.2497,-123.1193], tiles="Openstreetmap", zoom_start=10)
# HeatMap(data,radius=7).add_to(folium.FeatureGroup(name='Heat Map').add_to(m))
# folium.LayerControl().add_to(m)
#tooltip = "Click me!"
for lat, lon, location, crash_count in zip(map_data_2020['Latitude'],map_data_2020['Longitude'],map_data_2020['Location'],map_data_2020["Crash Count"]):
folium.Marker(
[lat,lon],popup="<b>{}, crash count: {}</b>".format(location,crash_count), icon=folium.Icon(color="red"),
#popup=folium.Popup(max_width=450)#.add_child(
#folium.Vega(vis1, width=450, height=250)
#),
).add_to(m_2020)
m_2020
The city with the largest number of car crashes during the analyzed period (2016 - 2020) was Vancouver, with 132614 counts. As the most important city in the Lower Mainland, people from other cities come to visit for entertainment or work porpouses, contributing to larger daily volumes of cars, thus increasing the likelyhood of a car crash taking place in Vancouver.
The higher number of car crashes reflected in higher number of casualties. Vancouver holds the record of highest number car crash casualties during the analyzed period.
New Westminster recorded the lowest number of car crashes from 2016 to 2020 (12328). This can be explanined by the smaller size of the city compared to other cities in the Lower Mainland.
The data shows a decline trend from 2016 to 2019 which can be attributed to govenment actions and safe driving awareness of drivers.
The data also shows a substancial decline in car crashes from 2019 and 2020. This decline can be mainly explained by the restrictions imposed by the government due to the pandemic that discoureged movement of people between cities.
The effect of the pandemic is more noticible in Vancouver, where no location whithin the limits of the city (excluding roads on the borders) recorded 100 crashes or more in 2020.